# setwd("~/Dropbox/T&G project survey/Ukraine/replication_JOP")
library(tidyverse)
library(scales)
library(haven)
source("func.R")
# Locale
Sys.setlocale("LC_CTYPE", "C")

# ggplot defaults: geoms theme
update_geom_defaults("text", list(family = "Archivo Narrow"))
update_geom_defaults("label", list(family = "Archivo Narrow"))
theme_set(theme_nice())

## ---------------------------------------------------------------
## Own survey - plot daily trends

survey = read_rds("data/clean_data.rds") %>%
  group_by(date) %>%
  summarize(
    natID = mean(Q60, na.rm = TRUE),
    natID_se = sd(Q60)/sqrt(n()),
    n = n()) %>%
  mutate(n_cat = cut(n, breaks = c(0, 10, 50, 200, max(n)))) %>%
  mutate(
    natID_upr = natID + qnorm(0.975) * natID_se,
    natID_lwr = natID - qnorm(0.975) * natID_se)

p = ggplot(survey %>% filter(n > 10), aes(x = date, y = natID, color = n_cat)) +
  geom_point() +
  geom_errorbar(aes(ymin = natID_lwr, ymax = natID_upr), width = 0) +
  labs(x = "", y = "Mean national ID") +
  # scale_y_continuous(limits = c(0, 10), breaks = c(5:10)) +
  scale_x_date(breaks = "7 days", date_labels = "%b %d") +
  scale_color_manual(name = "Observations per day", values = c(gray(0.8), gray(0.5), "black"),
    labels = c("10-50", "50-200", "200+")) +
  theme(legend.position = "bottom")
ggsave("figures/natID_daily.pdf", height = 4, width = 4.9, device = cairo_pdf)

## ---------------------------------------------------------------
## ESS (rounds 8-10)

ESS = bind_rows(
  read.csv("data/ESS/ESS8e02_2.csv") %>%
    mutate(year = 2016) %>%
    select(cntry, year, atchctr),
  read.csv("data/ESS/ESS9e03_1.csv") %>%
    mutate(year = 2018) %>%
    select(cntry, year, atchctr),
  read.csv("data/ESS/ESS10.csv") %>%
    mutate(year = 2020) %>%
    select(cntry, year, atchctr)
  ) %>%
  mutate(atchctr = ifelse(atchctr > 10, NA, atchctr))

ESS_natID = ESS %>%
  group_by(cntry, year) %>%
  summarize(
    natID_mean = mean(atchctr, na.rm = TRUE),
    natID_se = sd(atchctr, na.rm = TRUE)/sqrt(n())) %>%
  mutate(
    natID_upr = natID_mean + qnorm(0.975) * natID_se,
    natID_lwr = natID_mean - qnorm(0.975) * natID_se)


# Plot comparing Spain with rest of countries

p = ggplot(ESS_natID %>% filter(cntry!="ES"),
    aes(x = year, y = natID_mean, group = cntry)) +
  geom_line() +
  geom_point() +
  geom_line(data = ESS_natID %>% filter(cntry=="ES"),
    aes(x = year, y = natID_mean), color = "#F8766D") +
  geom_point(data = ESS_natID %>% filter(cntry=="ES"),
    aes(x = year, y = natID_mean), color = "#F8766D") +
  scale_x_continuous(breaks = c(2016, 2018, 2020),
    labels = c("ESS8\n(2016)", "ESS9\n(2018)", "ESS10\n(2020)")) +
  annotate("text", x = 2018.4, y = 7.55, label = "Spain", color = "#F8766D") +
  theme(legend.position = "none") +
  labs(x = "", y = "Mean national ID")
ggsave("figures/natID_ESS.pdf", height = 3, width = 5, device = cairo_pdf)


## ---------------------------------------------------------------
## CIS 3028 & 3036 from 2014

cis = bind_rows(
  readCIS(folder = "data/CIS/MD3028") %>%
    rename(natID = P1003, ccaa = CCAA, ideo = P43) %>%
    mutate(year = 2014, ESTU = 3028, date = "May-Jun 2014") %>%
    select(ESTU, year, natID, ccaa, date, ideo),
  readCIS(folder = "data/CIS/MD3036") %>%
    rename(natID = P20_3, ccaa = CCAA, ideo = P23) %>%
    mutate(year = 2015, ESTU = 3036, date = "Dec 2014-Jan 2015") %>%
    select(ESTU, year, natID, ccaa, date, ideo),
  read_sav("data/CIS/MD7809/MD7809.sav") %>%
    rename(natID = Y2103, ccaa = CCAA, ideo = P33) %>%
    mutate(year = 2009, ccaa = as.character(ccaa), date = "Apr-May 2009") %>%
    select(ESTU, year, natID, ccaa, date, ideo)
  ) %>%
  mutate(natID = ifelse(natID > 10, NA, natID)) %>%
  mutate(ideo = ifelse(ideo > 10, NA, ideo))

cis_agg = cis %>%
  group_by(ESTU, date) %>%
  summarize(
    natID_mean = mean(natID, na.rm = TRUE),
    natID_se = sd(natID, na.rm = TRUE)/sqrt(n())) %>%
  mutate(
    natID_upr = natID_mean + qnorm(0.975) * natID_se,
    natID_lwr = natID_mean - qnorm(0.975) * natID_se) %>%
  mutate(ESTUlab = paste0("CIS ", as.character(ESTU), "\n(", date, ")"))

p = ggplot(cis_agg, aes(x = ESTUlab, y = natID_mean)) +
  geom_point() +
  geom_errorbar(aes(ymin = natID_lwr, ymax = natID_upr), width = 0) +
  labs(x = "", y = "Mean national ID") +
  scale_y_continuous(limits = c(6, 9))
ggsave("figures/natID_CIS.pdf", height = 3, width = 4.9, device = cairo_pdf)
